Read in data and parameter estimates

#Import raw data
dataPath<- file.path("Data/")
#Experiment was administered by MATLAB
#.mat file been preprocessed into melted long dataframe by importE1data.Rmd
data<- readRDS( file.path(dataPath, "backwards2E1_rawDataFromMAT.rda") ) 
df<-data
df$letterSeq<-NULL #Because dplyr won't support it
estimates<- read.csv(file="Results/backwards2E1_paramEstimates.csv", header=TRUE, sep=",")

Sanity-check some data by graphing.

Investigate whether the pathological identicalness of inverted/canonical is already at this point

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
dg<- df %>% filter(subject < "AC")

numItemsInStream<- length( data$letterSeq[1,] )  
minSPE<- -17; maxSPE<- 17
library(ggplot2)
g=ggplot(dg, aes(x=SPE)) + facet_grid(subject+orientation~stream) #,  scales="free_y")
g<-g+geom_histogram(binwidth=1,color="grey90") + xlim(minSPE,maxSPE)
g<-g+ geom_text(x=12, y= 33, aes(label = subject)) #inset subject name/number. Unfortunately it overwrites itself a million times
show(g)

Let’s inspect histogram plots and fits. Check exclusions

Add R parameter estimates to dataframe.

Calculate curves for parameters, to plot on histograms.

library(mixRSVP)

curves<- df %>% group_by_at(.vars = condtnVariableNames) %>% 
  do(calc_curves_dataframe(.,minSPE,maxSPE,numItemsInStream))

#Calc numObservations to each condition. This is needed only for scaling the fine-grained Gaussian
#Calc the number of observations for each condition, because gaussianScaledforData needs to know.
dfGroups<- df %>% group_by_at(.vars = condtnVariableNames) %>% summarise(nPerCond = n())
#add nPerCond back to parameter estimates
estimates<- merge(estimates,dfGroups)

grain<-.05
gaussFine<- estimates %>% group_by_at(.vars = condtnVariableNames) %>% do(
  gaussian_scaled_from_df(.,minSPE,maxSPE,grain) )

Calculate how many graphs we need to plot.

numGroups<- nrow(dfGroups) # length(table(df$orientation,df$subject,df$stream)) 
print(paste("Num groups = ",numGroups))
## [1] "Num groups =  180"
fontSz = 400/numGroups

Define function to plot bunch of subjects.

library(ggplot2)

plotBunch<- function(df,curves) {

  g=ggplot(df, aes(x=SPE)) + facet_grid(subject+orientation~stream) #,  scales="free_y")
  g<-g+geom_histogram(binwidth=1,color="grey90") + xlim(minSPE,maxSPE)
  g<-g+ geom_text(x=12, y= 33, aes(label = subject)) #inset subject name/number. Unfortunately it overwrites itself a million times
  g<-g +theme_apa() #+theme(panel.grid.minor=element_blank(),panel.grid.major=element_blank())# hide all gridlines.
  #g<-g+ theme(line=element_blank(), panel.border = element_blank())
  sz=.8
  #Plot the underlying Gaussian , not just the discretized Gaussian. But it's way too tall. I don't know if this is 
  #a scaling problem or what actually is going on.
  #g<-g + geom_line(data=gaussFine,aes(x=x,y=gaussianFreq),color="darkblue",size=1.2)
  
  g<-g+ geom_point(data=curves,aes(x=x,y=combinedFitFreq),color="chartreuse3",size=sz*2.5)
  g<-g+ geom_line(data=curves,aes(x=x,y=guessingFreq),color="yellow",size=sz)
  #Discretized Gaussian
  g<-g+ geom_line(data=curves,aes(x=x,y=gaussianFreq),color="lightblue",size=sz)
  
  #mixSig - whether mixture model statistically significantly better than guessing
  curves <- dplyr::mutate(curves, mixSig = ifelse(pLRtest <= .05, TRUE, FALSE)) #annotate_fit uses this to color the p-value
  g<- annotate_fit(g,curves) #assumes curvesDf includes efficacy,latency,precision
  #Somehow the which mixSig (TRUE or FALSE) is red and which green is flipped relative to plot_hist_with_fit even though
  #identical commands are used. I haven't been able to work out why.
  g<- g + scale_color_manual(values=c("red","forestgreen"))
  return (g)   
}  
#A good number of rows for fig of height 100 in html is 32.
#Each subject gets 2 rows. Therefore find the 16-subject intervals
subjectsPerGraph<-16
#length(unique(df$subject[ df$subject < "AQ" ]))
numSs<- length( unique(df$subject) )
subjectBreaks<- seq(1,numSs,subjectsPerGraph)

subjCutoffs<- unique(df$subject)[subjectBreaks]
#add last subject to cutoff list
subjCutoffs<- c( subjCutoffs, tail(unique(df$subject),1) )
for (i in 1:(length(subjCutoffs)-1)) {
  subjMin <- subjCutoffs[i]
  subjMax<- subjCutoffs[i+1]
  dg<- df %>% filter(subject >= subjMin, subject <= subjMax)
  curvesThis <- curves %>% filter(subject >= subjMin, subject <= subjMax)
  
  h<-plotBunch(dg,curvesThis)
  show(h)
}
## Scale for 'colour' is already present. Adding another scale for
## 'colour', which will replace the existing scale.
## Scale for 'colour' is already present. Adding another scale for
## 'colour', which will replace the existing scale.

## Scale for 'colour' is already present. Adding another scale for
## 'colour', which will replace the existing scale.

g=ggplot(dg, aes(x=SPE)) + facet_grid(subject+orientation~stream) #, scales=“free_y”) g<-g+geom_histogram(binwidth=1,color=“grey90”) + xlim(minSPE,maxSPE) g<-g+ geom_text(x=12, y= 33, aes(label = subject)) #inset subject name/number. Unfortunately it overwrites itself a million times g<-g +theme_apa() #+theme(panel.grid.minor=element_blank(),panel.grid.major=element_blank())# hide all gridlines. #g<-g+ theme(line=element_blank(), panel.border = element_blank()) sz=.8 #Plot the underlying Gaussian , not just the discretized Gaussian. But it’s way too tall. I don’t know if this is #a scaling problem or what actually is going on. #g<-g + geom_line(data=gaussFine,aes(x=x,y=gaussianFreq),color=“darkblue”,size=1.2)

g<-g+ geom_point(data=curves,aes(x=x,y=combinedFitFreq),color=“chartreuse3”,size=sz*2.5) g<-g+ geom_line(data=curves,aes(x=x,y=guessingFreq),color=“yellow”,size=sz) #Discretized Gaussian g<-g+ geom_line(data=curves,aes(x=x,y=gaussianFreq),color=“lightblue”,size=sz)

mixSig - whether mixture model statistically significantly better than guessing

curves <- dplyr::mutate(curves, mixSig = ifelse(pLRtest <= .05, TRUE, FALSE)) #annotate_fit uses this to color the p-value g<- annotate_fit(g,curves) #assumes curvesDf includes efficacy,latency,precision #Somehow the which mixSig (TRUE or FALSE) is red and which green is flipped relative to plot_hist_with_fit even though #identical commands are used. I haven’t been able to work out why. g<- g + scale_color_manual(values=c(“red”,“forestgreen”))

show(g)

```